# World Politics
# Replication Code: "Agrarian Elites, Wealth Inequality, and State Capacity: Evidence from Early 20th-Century Brazil"

library(dplyr)
library(lmtest)
library(multiwayvcov)
library(AER)
library(stargazer)
library(ggplot2)
library(scales)
library(lfe)
library(rgdal)
library(rgeos)
library(spdep)
library(tidyr)
library(rgeos)
library(rgdal)
library(maps) 
library(mapdata)
library(maptools)


# Figure 1: Maps

municipal1920 <- readOGR(dsn = "~/map", layer = "mun1920")

# Figure 1(a) Land Gini

qland <- quantile(municipal1920$land_gn, na.rm=T)

# tiff("Figure1a.tiff", units="in", width=5, height=5, res=300)

plot(municipal1920, border="grey")
plot(municipal1920[which(municipal1920$land_gn <= qland[[1]]),], add=TRUE, col="#D5E8F3FF", border=F) 
plot(municipal1920[which(municipal1920$land_gn <= qland[[2]] & municipal1920$land_gn > qland[[1]]),], add=TRUE, col="#ACCCE4FF", border=F)
plot(municipal1920[which(municipal1920$land_gn <= qland[[3]] & municipal1920$land_gn > qland[[2]]),], add=TRUE, col="#7FABD3FF", border=F)
plot(municipal1920[which(municipal1920$land_gn <= qland[[4]] & municipal1920$land_gn > qland[[3]]),], add=TRUE, col="#5087C1FF", border=F)
plot(municipal1920[which(municipal1920$land_gn > qland[[4]]),], add=TRUE, col="#325FA2FF", border=F)
legend("bottomleft", title="Land Gini\nQuintiles", 
       legend=c("< 0.13", "> 0.13 and < 0.54", "> 0.54 and < 0.63", "> 0.63 and < 0.71", "> 0.71"), 
       bty = "n", inset=.02, fill=c("#D5E8F3FF", "#ACCCE4FF", "#7FABD3FF", "#5087C1FF", "#325FA2FF"), cex=0.7)
# dev.off()

# Figure 1(b) Tax revenue per capita

qrev <- quantile(municipal1920$cpt_rvn, na.rm=T)

# tiff("Figure1b.tiff", units="in", width=5, height=5, res=300)

plot(municipal1920, border="grey")
plot(municipal1920[which(municipal1920$cpt_rvn <= qrev[[1]]),], add=TRUE, col="#F6F6F6FF", border=F)
plot(municipal1920[which(municipal1920$cpt_rvn <= qrev[[2]] & municipal1920$cpt_rvn > qrev[[1]]),], add=TRUE, col="#FFD4D6FF", border=F)
plot(municipal1920[which(municipal1920$cpt_rvn <= qrev[[3]] & municipal1920$cpt_rvn > qrev[[2]]),], add=TRUE, col="#FFA6AAFF", border=F)
plot(municipal1920[which(municipal1920$cpt_rvn <= qrev[[4]] & municipal1920$cpt_rvn > qrev[[3]]),], add=TRUE, col="#FF7078FF", border=F)
plot(municipal1920[which(municipal1920$cpt_rvn > qrev[[4]]),], add=TRUE, col="#EE253AFF", border=F)
legend("bottomleft", title="Tax revenue per capita\nQuintiles", legend=c("< 0.07", "> 0.07 and < 0.96", "> 0.96 and < 2.16", "> 2.16 and < 4.98", "> 4.98"), 
       bty = "n", inset=.02, fill=c("#F6F6F6FF","#FFD4D6FF","#FFA6AAFF","#FF7078FF","#EE253AFF"), cex=0.65)
# dev.off()

rm(municipal1920, qland, qrev)

# Figure 2: Local Land Gini and Outcome Measures

data <- read.csv("brazil-first-republic.csv")

# Figure 2(a): Tax Revenue per capita (log)

# tiff("Figure2a.tiff", units="in", width=5, height=7.5, res=300)

ggplot(data = data, aes(x = land.gini, y = log(capita.revenues))) + 
  geom_point(size= 1.5, color="gray20", na.rm=TRUE) +
  geom_smooth(method=lm, color="firebrick3", fill="gray80", linewidth = 0.7, na.rm=TRUE) +
  scale_x_continuous("Land Gini, 1920") +
  scale_y_continuous("Tax Revenue\nper Capita (log)",
                     labels = label_number(accuracy = 0.1)) + 
  theme(legend.position = "none",
        axis.title = element_text(size=15),
        axis.text.x = element_text(size=13), 
        axis.text.y = element_text(size=13), 
        axis.title.y = element_blank(),
        plot.background = element_rect(fill = "white"), 
        panel.background = element_rect(fill = "white", colour = "black",
                                        linewidth = 0.4, linetype = "solid"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth = .3, color="azure2" ), 
        panel.grid.minor = element_blank(),
        plot.margin = margin(15, 15, 15, 15)) 
# dev.off()

# Figure 2(b): Tax Revenue / Output (log)

# tiff("Figure2b.tiff", units="in", width=5, height=7.5, res=300)

ggplot(data = data, aes(x = land.gini, y = log(revenues.gdp))) + 
  geom_point(size= 1.5, color="gray20", na.rm=TRUE) +
  geom_smooth(method=lm, color="firebrick3", fill="gray80", linewidth = 0.7, na.rm=TRUE) +
  scale_x_continuous("Land Gini, 1920") +
  scale_y_continuous("Tax Revenue/Output\n(log)",
                     labels = label_number(accuracy = 0.1)) + 
  theme(legend.position = "none",
        axis.title = element_text(size=15),
        axis.text.x = element_text(size=13), 
        axis.text.y = element_text(size=13), 
        axis.title.y = element_blank(),
        plot.background = element_rect(fill = "white"), 
        panel.background = element_rect(fill = "white", colour = "black",
                                        linewidth = 0.4, linetype = "solid"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth = .3, color="azure2" ), 
        panel.grid.minor = element_blank(),
        plot.margin = margin(15, 15, 15, 15)) 
# dev.off()

# Figure 2(c): Total Revenue minus Export taxes per capita (log)

# tiff("Figure2c.tiff", units="in", width=5, height=7.5, res=300)

ggplot(data = data, aes(x = land.gini, y = log(rev.minus.export))) + 
  geom_point(size= 1.5, color="gray20", na.rm=TRUE) +
  geom_smooth(method=lm, color="firebrick3", fill="gray80", linewidth = 0.7, na.rm=TRUE) +
  scale_x_continuous("Land Gini, 1920") +
  scale_y_continuous("Tax Revenue excluding Export Taxes\nper Capita  (log)",
                     labels = label_number(accuracy = 0.1)) + 
  theme(legend.position = "none",
        axis.title = element_text(size=15),
        axis.text.x = element_text(size=13), 
        axis.text.y = element_text(size=13), 
        axis.title.y = element_blank(),
        plot.background = element_rect(fill = "white"), 
        panel.background = element_rect(fill = "white", colour = "black",
                                        linewidth = 0.4, linetype = "solid"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth = .3, color="azure2" ), 
        panel.grid.minor = element_blank(),
        plot.margin = margin(15, 15, 15, 15)) 
# dev.off()

# Figure 2(d):  Number of municipal officials per capita (log)

# tiff("Figure2d.tiff", units="in", width=5, height=7.5, res=300)

ggplot(data = data[which(data$admpub.municipal != 0),], aes(x = land.gini, y = log(admpub.municipal/pop.1920))) + 
  geom_point(size= 1.5, color="gray20", na.rm=TRUE) +
  geom_smooth(method=lm, color="firebrick3", fill="gray80", linewidth = 0.7, na.rm=TRUE) +
  scale_x_continuous("Land Gini, 1920") +
  scale_y_continuous("Number of State\nOfficials (log)",
                     labels = label_number(accuracy = 0.1)) + 
  theme(legend.position = "none",
        axis.title = element_text(size=15),
        axis.text.x = element_text(size=13), 
        axis.text.y = element_text(size=13), 
        axis.title.y = element_blank(),
        plot.background = element_rect(fill = "white"), 
        panel.background = element_rect(fill = "white", colour = "black",
                                        linewidth = 0.4, linetype = "solid"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth = .3, color="azure2" ), 
        panel.grid.minor = element_blank(),
        plot.margin = margin(15, 15, 15, 15)) 
# dev.off()

# Table 1: Land Inequality and Local Fiscal and Administrative Capacity

# Table 1 Panel A: Tax revenue per capita (log) 

main0 <- lm(log(capita.revenues) ~ land.gini , data = data)
cl.robust.se.main0 <- coeftest(main0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2] 

main1 <- lm(log(capita.revenues) ~ land.gini + as.factor(UF1920), data = data)
cl.robust.se.main1 <- coeftest(main1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

main2 <- lm(log(capita.revenues) ~ land.gini + productivity + log(gdp.pc) 
            + year.foundation + num.landowners + avg.value.hectar 
            + log(area.mun) + pc.PIB.ind + as.factor(UF1920), data = data)
cl.robust.se.main2 <- coeftest(main2, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

main3 <- lm(log(capita.revenues) ~ land.gini + productivity + log(gdp.pc) 
            + year.foundation + num.landowners + avg.value.hectar 
            + log(area.mun) + pc.PIB.ind 
            + latitude + longitude + I(latitude^2) + I(longitude^2)
            + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.robust.se.main3 <- coeftest(main3, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

main4 <- lm(log(capita.revenues) ~ land.gini 
            + productivity + log(gdp.pc) 
            + year.foundation + num.landowners + avg.value.hectar 
            + log(area.mun) 
            + latitude + longitude + I(latitude^2) + I(longitude^2)
            + dist.capital + dist.coast + as.factor(UF1920)
            , data = data[which(data$pc.PIB.ind < .05),])
cl.robust.se.main4 <- coeftest(main4, vcov=function(x) cluster.vcov(x, data[which(data$pc.PIB.ind < .05),"UF1920"]))[,2]

stargazer(main0, main1, main2, main3, main4, 
          se=list(cl.robust.se.main0, cl.robust.se.main1, cl.robust.se.main2, cl.robust.se.main3, cl.robust.se.main4), 
          keep = c("land.gini"),
          covariate.labels = "Land Gini",
          title = "Tax Revenue per capita (log)",
          dep.var.labels = "Tax Revenue per capita (log)",
          omit.stat = c("adj.rsq","ll", "f", "ser"), 
          omit = "Constant", add.lines = list(c("Additional covariates", "No", "No", "Yes", "Yes", "Yes"),
                                              c("Geographic characteristics", "No", "No", "No", "Yes", "Yes"),
                                              c("State fixed effects", "No", "Yes", "Yes", "Yes", "Yes"),
                                              c("Sample", "Full", "Full", "Full", "Full", "Rural")))

rm(main0, main1, main2, main3, main4, cl.robust.se.main0, cl.robust.se.main1, cl.robust.se.main2, cl.robust.se.main3, cl.robust.se.main4)

# Table 1 Panel B: Total Tax Revenue/Output (log)

gdp0 <- lm(log(revenues.gdp) ~ land.gini , data = data)
cl.gdp0 <- coeftest(gdp0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2] 

gdp1 <- lm(log(revenues.gdp)  ~ land.gini + as.factor(UF1920), data = data)
cl.gdp1 <- coeftest(gdp1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

gdp2 <- lm(log(revenues.gdp) ~ land.gini + productivity + log(pop.1920) 
           + year.foundation + num.landowners + avg.value.hectar 
           + log(area.mun) + pc.PIB.ind + as.factor(UF1920), data = data)
cl.gdp2 <- coeftest(gdp2, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

gdp3 <- lm(log(revenues.gdp) ~ land.gini + productivity + log(pop.1920) 
                  + year.foundation + num.landowners + avg.value.hectar 
                  + log(area.mun) + pc.PIB.ind + as.factor(UF1920)
                  + latitude + longitude + I(latitude^2) + I(longitude^2)
                  + dist.capital + dist.coast, data = data)
cl.gdp3 <- coeftest(gdp3, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

gdp4 <- lm(log(revenues.gdp) ~ land.gini 
           + productivity + log(pop.1920) 
           + year.foundation + num.landowners + avg.value.hectar 
           + log(area.mun) 
           + latitude + longitude + I(latitude^2) + I(longitude^2)
           + dist.capital + dist.coast + as.factor(UF1920)
           , data = data[which(data$pc.PIB.ind < .05),])
cl.gdp4 <- coeftest(gdp4, vcov=function(x) cluster.vcov(x, data[which(data$pc.PIB.ind < .05),"UF1920"]))[,2]


stargazer(gdp0, gdp1, gdp2, gdp3, gdp4,
          se=list(cl.gdp0, cl.gdp1, cl.gdp2, cl.gdp3,  cl.gdp4), 
          keep = c("land.gini"),
          covariate.labels = "Land Gini",
          title = "Tax Revenue/GDP (log)",
          dep.var.labels = "Tax Revenue/GDP (log)",
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = "Constant", 
          add.lines = list(c("State fixed effects", "No", "Yes", "Yes", "Yes", "Yes"),
                           c("Additional covariates", "No", "No", "Yes", "Yes", "Yes"),
                           c("Geographic characteristics", "No", "No", "No", "Yes", "Yes"),
                           c("Sample", "Full", "Full", "Full", "Full", "Rural")))

rm(gdp0, gdp1, gdp2, gdp3, gdp4, cl.gdp0, cl.gdp1, cl.gdp2, cl.gdp3, cl.gdp4)

# Table 1 Panel C: Total Revenue minus Export Taxes per capita (log)

exp0 <- lm(log(rev.minus.export) ~ land.gini , data = data)
cl.exp0 <- coeftest(exp0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2] 

exp1 <- lm(log(rev.minus.export)  ~ land.gini + as.factor(UF1920), data = data)
cl.exp1 <- coeftest(exp1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

exp2 <- lm(log(rev.minus.export) ~ land.gini + productivity + log(gdp.pc) + year.foundation
           + num.landowners + avg.value.hectar + scale(area.mun)
           + pc.PIB.ind + as.factor(UF1920), data = data)
cl.exp2 <- coeftest(exp2, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

exp3 <- lm(log(rev.minus.export) ~ land.gini + productivity + log(gdp.pc) + year.foundation
           + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
           + as.factor(UF1920) + latitude + longitude + I(longitude^2) + I(latitude^2)
           + dist.capital + dist.coast, data = data)
coeftest(exp3, vcov=function(x) cluster.vcov(x, data$UF1920))
cl.exp3 <- coeftest(exp3, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

exp4 <- lm(log(rev.minus.export) ~ land.gini + productivity + log(gdp.pc) + year.foundation
           + num.landowners + avg.value.hectar + log(area.mun)
           + as.factor(UF1920) + latitude + longitude + I(longitude^2) + I(latitude^2)
           + dist.capital + dist.coast, data = data[which(data$pc.PIB.ind < .05),])
cl.exp4 <- coeftest(exp4, vcov=function(x) cluster.vcov(x, data[which(data$pc.PIB.ind < .05),"UF1920"]))[,2]

stargazer(exp0, exp1, exp2, exp3, exp4,
          se=list(cl.exp0, cl.exp1, cl.exp2, cl.exp3, cl.exp4), 
          keep = c("land.gini"),
          covariate.labels = "Land Gini",
          title = "Total revenue minus export taxes, per capita (log)",
          dep.var.labels = "Total Revenue excl. Export Taxes, per capita (log)",
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = "Constant", 
          add.lines = list(c("State fixed effects", "No", "Yes", "Yes", "Yes", "Yes"),
                           c("Additional covariates", "No", "No", "Yes", "Yes", "Yes"),
                           c("Geographic covariates", "No", "No", "No", "Yes", "Yes"),
                           c("Sample", "Full", "Full", "Full", "Full", "Rural")))

rm(exp0, exp1, exp2, exp3, exp4, cl.exp0, cl.exp1, cl.exp2, cl.exp3, cl.exp4)

# Table 1 Panel D: Number of Public officials (log)

adm0 <- lm(log(admpub.municipal+1) ~ land.gini , data = data)
cl.adm0 <- coeftest(adm0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

adm1 <- lm(log(admpub.municipal+1) ~ land.gini + as.factor(UF1920), data = data)
cl.adm1 <- coeftest(adm1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

adm2 <- lm(log(admpub.municipal+1) ~ land.gini + productivity + log(pop.1920) + log(gdp.pc) 
           + year.foundation + num.landowners + avg.value.hectar 
           + log(area.mun) + pc.PIB.ind + as.factor(UF1920), data = data)
cl.adm2 <- coeftest(adm2, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

adm3 <- lm(log(admpub.municipal+1) ~ land.gini + productivity + log(pop.1920) + log(gdp.pc) 
           + year.foundation + num.landowners + avg.value.hectar
           + log(area.mun) + pc.PIB.ind + as.factor(UF1920)
           + latitude + longitude + I(latitude^2) + I(longitude^2)
           + dist.capital + dist.coast, data = data)
cl.adm3 <- coeftest(adm3, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

adm4 <- lm(log(admpub.municipal+1) ~ land.gini + productivity + log(pop.1920) + log(gdp.pc) 
           + year.foundation + num.landowners + avg.value.hectar 
           + log(area.mun) + as.factor(UF1920) + latitude + longitude + I(latitude^2) + I(longitude^2)
           + dist.capital + dist.coast, data = data[which(data$pc.PIB.ind < .05),])
cl.adm4 <- coeftest(adm4, vcov=function(x) cluster.vcov(x, data[which(data$pc.PIB.ind < .05),"UF1920"]))[,2]

stargazer(adm0, adm1, adm2, adm3, adm4,
          se=list(cl.adm0, cl.adm1, cl.adm2, cl.adm3, cl.adm4), 
          keep = c("land.gini"),
          covariate.labels = "Land Gini",
          title = "Size of the Municipal Bureaucracy",
          dep.var.labels = "Number of Public Officials (log)",
          omit.stat = c("adj.rsq","ll", "F", "ser"), omit = "Constant", 
          add.lines = list(c("State fixed effects", "No", "Yes", "Yes", "Yes", "Yes"),
                           c("Additional covariates", "No", "No", "Yes", "Yes", "Yes"),
                           c("Geographic characteristics", "No", "No", "No", "Yes", "Yes"),
                           c("Sample", "Full", "Full", "Full", "Full", "Rural")))

rm(adm0, adm1, adm2, adm3, adm4, cl.adm0, cl.adm1, cl.adm2, cl.adm3, cl.adm4)


# Table 2: Expenditure Categories: Public Spending per Capita (log)

sp1 <- lm(log(spd.gov/pop.1920 +1) ~ land.gini 
          + productivity + log(gdp.pc) 
          + year.foundation + num.landowners + avg.value.hectar
          + scale(area.mun) + pc.PIB.ind 
          + latitude + longitude + I(latitude^2) + I(longitude^2)
          + dist.capital + dist.coast + as.factor(UF1920) 
          , data = data)
cl.sp1 <- coeftest(sp1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

sp2 <- lm(log(spd.hs/pop.1920 +1) ~ land.gini 
          + productivity + log(gdp.pc) 
          + year.foundation + num.landowners + avg.value.hectar
          + scale(area.mun) + pc.PIB.ind 
          + latitude + longitude + I(latitude^2) + I(longitude^2)
          + dist.capital + dist.coast + as.factor(UF1920) 
          , data = data)
cl.sp2 <- coeftest(sp2, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

sp3 <- lm(log(spd.pubworks/pop.1920 +1) ~ land.gini 
          + productivity + log(gdp.pc) + year.foundation + num.landowners + avg.value.hectar 
          + scale(area.mun) + pc.PIB.ind 
          + latitude + longitude + I(latitude^2) + I(longitude^2)
          + dist.capital + dist.coast + as.factor(UF1920) 
          , data = data)
cl.sp3 <- coeftest(sp3, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

sp4 <- lm(log(spd.safety/pop.1920 +1) ~ land.gini 
          + productivity + log(gdp.pc) + year.foundation 
          + num.landowners + avg.value.hectar + scale(area.mun)
          + pc.PIB.ind + latitude + longitude + I(latitude^2) + I(longitude^2)
          + dist.capital + dist.coast + as.factor(UF1920) 
          , data = data)
cl.sp4 <- coeftest(sp4, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

sp5 <- lm(log(spd.educ/pop.1920 +1) ~ land.gini 
          + productivity + log(gdp.pc) + year.foundation 
          + num.landowners + avg.value.hectar + scale(area.mun)
          + pc.PIB.ind + latitude + longitude + I(latitude^2) + I(longitude^2)
          + dist.capital + dist.coast + as.factor(UF1920) 
          , data = data)
cl.sp5 <- coeftest(sp5, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

sp6 <- lm(log(spd.int/pop.1920 +1) ~ land.gini 
          + productivity + log(gdp.pc) + year.foundation 
          + num.landowners + avg.value.hectar + scale(area.mun)
          + pc.PIB.ind + latitude + longitude + I(latitude^2) + I(longitude^2)
          + dist.capital + dist.coast + as.factor(UF1920) 
          , data = data)
cl.sp6 <- coeftest(sp6, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(sp1, sp2, sp3, sp4, sp5, sp6,
          se=list(cl.sp1, cl.sp2, cl.sp3, cl.sp4, cl.sp5, cl.sp6),
          title = "Expenditure Categories: Public Spending per Capita (log)",
          keep = c("land.gini"),
          covariate.labels = "Land Gini",
          dep.var.labels = c("Bureaucracy", "Health and Sanitation", "Public Works", "Public Safety", "Education", "Debt Service"),
          omit.stat = c("ll", "ser", "f", "adj.rsq"), omit = c("Constant"),
          add.lines = list(c("Additional covariates", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
                           c("Geographic characteristics", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
                           c("State fixed effects", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")))

rm(sp1, sp2, sp3, sp4, sp5, sp6, cl.sp1, cl.sp2, cl.sp3, cl.sp4, cl.sp5, cl.sp6)


# Table 3: Moderating Effect of Political Contestation

cond0 <- lm(log(capita.revenues) ~ land.gini*opposition.sh, data = data)
cl.cond0 <- coeftest(cond0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2] 

cond1 <- lm(log(capita.revenues)  ~ land.gini*opposition.sh + as.factor(UF1920), data = data)
cl.cond1 <- coeftest(cond1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

cond2 <- lm(log(capita.revenues) ~ land.gini*opposition.sh
            + productivity + log(gdp.pc) + year.foundation + avg.value.hectar 
            + log(area.mun) + pc.PIB.ind + latitude + longitude + I(latitude^2) + I(longitude^2)
            + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.cond2 <- coeftest(cond2, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(cond0, cond1, cond2,
          se = list(cl.cond0, cl.cond1, cl.cond2),
          keep = c("land.gini", "opposition.sh"),
          covariate.labels = c("Land Gini", "Political Threat", "Land Gini x Political Threat"),
          title = "Moderating effect of Political Contestation: Interaction of Land Inequality and Share of Elections with Competition from an Opposition Party",
          omit.stat = c("adj.rsq","ll", "ser", "f"), omit = c("Constant"),
          dep.var.labels = "Tax Revenue per capita (log)", 
          add.lines = list(c("State fixed effects", "No", "Yes", "Yes"),
                           c("Additional covariates", "No", "No", "Yes")) )

rm(cond0, cond1, cond2, cl.cond0, cl.cond1, cl.cond2)

# Figure 3: Estimated Land Gini Coefficient by Level of Political Contestation

fit_cl <- felm(log(capita.revenues) ~ land.gini*opposition.sh 
               + productivity + log(gdp.pc) + year.foundation + avg.value.hectar
               + log(area.mun) + pc.PIB.ind + latitude + longitude + I(latitude^2) + I(longitude^2)
               + dist.capital + dist.coast | UF1920 | 0 | UF1920, 
               data = data)
beta.hat <- coef(fit_cl)
vcov1 <- vcov(fit_cl) 
z0 <- seq(min(data$opposition.sh,  na.rm =T), max(data$opposition.sh, na.rm =T), length.out = 1000)
dy.dx <- beta.hat["land.gini"] + beta.hat["land.gini:opposition.sh"]*z0
se.dy.dx <- sqrt(vcov1["land.gini", "land.gini"] + (z0^2)*vcov1["land.gini:opposition.sh", "land.gini:opposition.sh"] + 2*z0*vcov1["land.gini", "land.gini:opposition.sh"])
upr <- dy.dx + 1.96*se.dy.dx
lwr <- dy.dx - 1.96*se.dy.dx
# Histogram
xmin <- c(0.09, 0.17, 0.25, 0.33, 0.41, 0.49, 0.57) 
xmax <- c(0.17, 0.25, 0.33, 0.41, 0.49, 0.57, 0.65)
ymin <- c(-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5)
ymax <- c(4, 7, 5, 2, 1, 0, 1)/14 - 1.5

# tiff("Figure3.tiff", units="in", width=6, height=4, res=300)
ggplot(data=NULL, aes(x=z0, y=dy.dx)) + 
  labs(x="Share of Elections with an Opposition Party", 
       y="Marginal Effects of Land Gini on\nTax Revenue per capita (log)", 
       title= "", cex = 4) +
  geom_line(aes(z0, dy.dx), color = "firebrick3") +
  geom_hline(yintercept=0, linetype = "dashed", color = "dodgerblue3") +
  geom_ribbon(aes(ymin=lwr, ymax=upr), alpha=0.3, fill = "grey60") + 
  theme(legend.position = "none",
        axis.title = element_text(size=15),
        axis.text.x = element_text(size=13), 
        axis.text.y = element_text(size=13), 
        plot.background = element_rect(fill = "white"), 
        panel.background = element_rect(fill = "white", colour = "white", linewidth = 0.4, linetype = "solid"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth = .3, color="azure2"), 
        panel.grid.minor = element_blank() ) +
  geom_rect(aes(x = NULL, y = NULL, xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), fill = "transparent", color = "grey60")
# dev.off()

rm(fit_cl, beta.hat, dy.dx, lwr, upr, xmax, xmin, ymax, ymin, z0, se.dy.dx, vcov1)


# Supplementary Material

# Appendix A

# Table A.1: Property Tax Rates and Revenues in the State of São Paulo, 1918

fit.rate1 <- lm(log(revenue.property/pop.1920+1) ~ prop.tx.rate 
            , data = data[which(data$UF1920 == 35),])

fit.rate2 <- lm(log(revenue.property/pop.1920+1) ~ prop.tx.rate 
            + log(gdp.pc) + log(pop.1920) + year.foundation
            + scale(area.mun) + pc.PIB.ind 
            + longitude + latitude + I(latitude^2) + I(longitude^2)
            + dist.coast + dist.capital, data = data[which(data$UF1920 == 35),])

stargazer(fit.rate1, fit.rate2, 
          title = "Property Tax Rates and Revenues in the State of São Paulo, 1918",
          keep = c("prop.tx.rate"),
          covariate.labels = "Property Tax Rate",
          dep.var.labels = "Property tax revenue per capita (log)",
          omit.stat = c("adj.rsq","ll", "ser", "f"), omit = c("Constant"),
          add.lines = list(c("Additional covariates", "No", "Yes")))

rm(fit.rate1, fit.rate2)

# Table A.2: Landholding Inequality and Capacity Outcomes

mainx <- lm(log(capita.revenues) ~ land.gini + productivity + log(gdp.pc) + year.foundation 
            + scale(num.landowners) + scale(avg.value.hectar)
            + log(area.mun) + pc.PIB.ind + latitude + longitude + I(latitude^2) + I(longitude^2)
            + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.robust.se.mainx <- coeftest(mainx, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

gdpx <- lm(log(revenues.gdp) ~ land.gini + productivity + log(pop.1920)
            + year.foundation + scale(num.landowners) + scale(avg.value.hectar)
            + log(area.mun) + pc.PIB.ind + latitude + longitude + I(latitude^2) + I(longitude^2)
            + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.gdpx <- coeftest(gdpx, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

expx <- lm(log(rev.minus.export) ~ land.gini + productivity + log(gdp.pc) + year.foundation 
           + scale(num.landowners) + scale(avg.value.hectar)
           + log(area.mun) + pc.PIB.ind + latitude + longitude + I(latitude^2) + I(longitude^2)
           + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.expx <- coeftest(expx, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

admx <- lm(log(admpub.municipal+1) ~ land.gini + productivity + log(gdp.pc) + log(pop.1920)
            + year.foundation + scale(num.landowners) + scale(avg.value.hectar)
            + log(area.mun) + pc.PIB.ind + latitude + longitude + I(latitude^2) + I(longitude^2)
            + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.admx <- coeftest(admx, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(mainx, gdpx, expx, admx,
          se=list(cl.robust.se.mainx, cl.gdpx, cl.expx, cl.admx), 
          keep = c("land.gini", "num.landowners", "avg.value.hectar"),
          covariate.labels = c("Land Gini", "Number of landowners", "Land Value (average)"),
          title = "Landholding Inequality and Capacity Outcomes",
          dep.var.labels = c("Tax Revenue per capita (log)", "Tax Revenue/Output (log)",
                             "Tax Revenue excl. export taxes (log)", "Size of local bureaucracy (log)"),
          omit.stat = c("adj.rsq","ll", "F", "ser"), omit = "Constant", 
          add.lines = list(c("State fixed effects", "Yes", "Yes", "Yes", "Yes"),
                           c("Additional covariates", "Yes", "Yes", "Yes", "Yes"),
                           c("Geographic characteristics", "Yes", "Yes", "Yes", "Yes"),
                           c("Sample", "Full", "Full", "Full", "Full")))

rm(admx, expx, gdpx, mainx, cl.admx, cl.expx, cl.gdpx, cl.robust.se.mainx)

# Table A.3: Alternative Measures of Landholding Inequality

alt.1 <- lm(log(capita.revenues) ~ log(ratio90.10) + productivity + log(gdp.pc) 
            + year.foundation + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind + electorate 
            + latitude + longitude + I(longitude^2) + I(latitude^2)
            + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.alt1 <- coeftest(alt.1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

alt.2 <- lm(log(capita.revenues) ~ log(ratio80.20) + productivity + log(gdp.pc) + year.foundation
            + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind + electorate
            + latitude + longitude + I(longitude^2) + I(latitude^2) 
            + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.alt2 <- coeftest(alt.2, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

alt.3 <- lm(log(capita.revenues) ~ log(qsr+1) + productivity + log(gdp.pc) + year.foundation
            + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind + electorate
            + latitude + longitude + I(longitude^2) + I(latitude^2)
            + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.alt3 <- coeftest(alt.3, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

alt.4 <- lm(log(capita.revenues) ~ log(Palma) + productivity + log(gdp.pc) + year.foundation
            + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind + electorate
            + latitude + longitude + I(longitude^2) + I(latitude^2)
            + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.alt4 <- coeftest(alt.4, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

alt.5 <- lm(log(capita.revenues) ~ scale(farms.largest) + productivity + log(gdp.pc) + year.foundation
            + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind + electorate
            + latitude + longitude + I(longitude^2) + I(latitude^2)
            + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.alt5 <- coeftest(alt.5, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(alt.1, alt.2, alt.3, alt.4, alt.5,
          se = list(cl.alt1, cl.alt2, cl.alt3, cl.alt4, cl.alt5), 
          keep = c("ratio90.10", "ratio80.20", "qsr", "Palma", "farms.largest"),
          covariate.labels = c("90:10 Ratio", "80:20 Ratio", "80S:20S Ratio", "Palma Index", "Farms in largest category"),
          title = "Alternative Measures of Landholding Inequality",
          dep.var.labels = "Tax Revenue per capita (log)",
          column.sep.width = "15pt", 
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = "Constant", 
          add.lines = list(c("Additional covariates", "Yes", "Yes", "Yes", "Yes", "Yes"),
                           c("State fixed effects", "Yes", "Yes", "Yes", "Yes", "Yes")))

rm(alt.1, alt.2, alt.3, alt.4, alt.5, cl.alt1, cl.alt2, cl.alt3, cl.alt4, cl.alt5)

# Table A.4: Landholding Inequality and the Number of Local Roads, Minas Gerais (1921)

lm.st0 <- lm(log(streets+1) ~ land.gini, data = data)

lm.st1 <- lm(log(streets+1) ~ land.gini + dist.capital + dist.coast
               + latitude + longitude + I(latitude^2) + I(longitude^2), data = data)

lm.st2 <- lm(log(streets+1) ~ land.gini + productivity + log(gdp.pc) + log(pop.1920) + year.foundation
               + num.landowners + avg.value.hectar + log(area.mun)
               + pc.PIB.ind + dist.capital + dist.coast
               + latitude + longitude + I(latitude^2) + I(longitude^2), data = data)

stargazer(lm.st0, lm.st1, lm.st2,
          keep = c("land.gini"),
          covariate.labels = "Land Gini",
          title = "Landholding Inequality and Number of Local Roads in the state of Minas Gerais",
          dep.var.labels = "Number of Local Roads (log)",
          omit.stat = c("adj.rsq","ll", "f", "ser"), 
          omit = "Constant", add.lines = list(c("Geographic characteristics", "No", "Yes", "Yes"),
                                              c("Additional covariates", "No", "No", "Yes")))

rm(lm.st0, lm.st1, lm.st2)

# Table A.5: Investigating Substitution Effects: Schools Financed by the State Government

sch0 <- lm(log(state.schools+1) ~ land.gini  + as.factor(UF1920), data = data)
cl.sch0 <- coeftest(sch0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

sch1 <- lm(log(state.schools+1) ~ land.gini + + productivity + log(gdp.pc) + log(pop.1920) + year.foundation 
           + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind + as.factor(UF1920), data = data)
cl.sch1 <- coeftest(sch1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

sch2 <- lm(log(state.schools+1) ~ land.gini + + productivity + log(gdp.pc) + log(pop.1920) + year.foundation
           + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind + dist.capital + dist.coast
           + latitude + longitude + I(latitude^2) + I(longitude^2) + as.factor(UF1920), data = data)
cl.sch2 <- coeftest(sch2, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(sch0, sch1, sch2, 
          se = list(cl.sch0, cl.sch1, cl.sch2),
          keep = c("land.gini"),
          covariate.labels = "Land Gini",
          title = "Landholding Inequality and Schools Financed by the State Government",
          dep.var.labels = c("Number of State Schools (log)"),
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = c("Constant"),
          add.lines = list(c("State fixed effects","Yes", "Yes", "Yes"),
                           c("Additional covariates","No", "Yes", "Yes"),
                           c("Geographic variables","No", "No", "Yes")))

rm(sch0, sch1, sch2, cl.sch0, cl.sch1, cl.sch2)

# Appendix B

# Table B.1: Past Levels of per Capita Taxation and Contemporary Local State Capacity

dtc <- read.csv("brazil-contemporary-outcomes.csv")

lm.iptu <- lm(log(IPTU.00/pop.2000+1) ~ log.receita.capita1920 + log(munarea1997.km2) + productivity + log(pib.pc.1920) 
              + proprietarios1920 + log(pop1920) + value.land.acre1920 + pc.pib.ind1920 + ano_fundacao
              + latitude  + longitude + I(longitude^2) + I(latitude^2) + distancia_mar_km + as.factor(UF), data = dtc) 
cl.se.iptu <- coeftest(lm.iptu, vcov=function(x) cluster.vcov(x, dtc$UF))[,2]

lm.tottax <- lm(log(totaltax.00/pop.2000) ~ log.receita.capita1920 + log(munarea1997.km2) + productivity + log(pib.pc.1920) 
                + proprietarios1920 + log(pop1920) + value.land.acre1920 + pc.pib.ind1920 + ano_fundacao
                + latitude  + longitude + I(longitude^2) + I(latitude^2) + distancia_mar_km + as.factor(UF), data = dtc) 
cl.se.tottax <- coeftest(lm.tottax, vcov=function(x) cluster.vcov(x, dtc$UF))[,2]

lm.cap <- lm(cg ~ log.receita.capita1920 + log(munarea1997.km2) + productivity + log(pib.pc.1920) 
             + proprietarios1920 + log(pop1920) + value.land.acre1920 + pc.pib.ind1920 + ano_fundacao
             + latitude  + longitude + I(longitude^2) + I(latitude^2) + distancia_mar_km + as.factor(UF), data = dtc) 
cl.se.cap <- coeftest(lm.cap, vcov=function(x) cluster.vcov(x, dtc$UF))[,2]

lm.instr <- lm(instrumentos ~ log.receita.capita1920 + log(munarea1997.km2) + productivity + log(pib.pc.1920) 
               + proprietarios1920 + log(pop1920) + value.land.acre1920 + pc.pib.ind1920 + ano_fundacao
               + latitude  + longitude + I(longitude^2) + I(latitude^2) + distancia_mar_km + as.factor(UF), data = dtc) 
cl.se.instr <- coeftest(lm.instr, vcov=function(x) cluster.vcov(x, dtc$UF))[,2]

lm.plan <- lm(planejamento ~ log.receita.capita1920 + log(munarea1997.km2) + productivity + log(pib.pc.1920) 
              + proprietarios1920 + log(pop1920) + value.land.acre1920 + pc.pib.ind1920 + ano_fundacao 
              + latitude  + longitude + I(longitude^2) + I(latitude^2) + distancia_mar_km + as.factor(UF), data = dtc) 
cl.se.plan <- coeftest(lm.plan, vcov=function(x) cluster.vcov(x, dtc$UF))[,2]

stargazer(lm.iptu, lm.tottax, lm.cap, lm.instr, lm.plan,
          se = list(cl.se.iptu, cl.se.tottax, cl.se.cap, cl.se.instr, cl.se.plan), 
          keep = "log.receita.capita1920",
          covariate.labels = "Tax revenue per capita (log), 1923",
          dep.var.labels = c("Property taxes per capita (log)", "Tax revenue per capita (log)",
                             "Municipal Capacity Index", "Administrative Instruments", "Planning Instruments"), 
          title = "Past Levels of per Capita Taxation and Contemporary Local Capacity",
          omit.stat = c("f", "ser", "adj.rsq"), omit = c("Constant"))

# Table B.2: Past Bureaucracy Size and Contemporary Local State Capacity

lm.iptu2 <- lm(log(IPTU.00/pop.2000+1) ~ log(admpub.municipal1920 +1) + log(munarea1997.km2) + productivity + log(pib.pc.1920) 
               + proprietarios1920 + log(pop1920) + value.land.acre1920 + pc.pib.ind1920 + ano_fundacao
               + latitude  + longitude + I(longitude^2) + I(latitude^2) + distancia_mar_km + as.factor(UF), data = dtc) 
cl.se.iptu2 <- coeftest(lm.iptu2, vcov=function(x) cluster.vcov(x, dtc$UF))[,2]

lm.tottax2 <- lm(log(totaltax.00/pop.2000) ~ log(admpub.municipal1920 +1) + log(munarea1997.km2) + productivity + log(pib.pc.1920) 
                 + proprietarios1920 + log(pop1920) + value.land.acre1920 + pc.pib.ind1920 + ano_fundacao
                 + latitude  + longitude + I(longitude^2) + I(latitude^2) + distancia_mar_km + as.factor(UF), data = dtc) 
cl.se.tottax2 <- coeftest(lm.tottax2, vcov=function(x) cluster.vcov(x, dtc$UF))[,2]

lm.cap2 <- lm(cg ~ log(admpub.municipal1920 +1) + log(munarea1997.km2) + productivity + log(pib.pc.1920) 
              + proprietarios1920 + log(pop1920) + value.land.acre1920 + pc.pib.ind1920 + ano_fundacao 
              + latitude  + longitude + I(longitude^2) + I(latitude^2) + distancia_mar_km + as.factor(UF), data = dtc) 
cl.se.cap2 <- coeftest(lm.cap2, vcov=function(x) cluster.vcov(x, dtc$UF))[,2]

lm.instr2 <- lm(instrumentos ~ log(admpub.municipal1920 +1) + log(munarea1997.km2) + productivity + log(pib.pc.1920) 
                + proprietarios1920 + log(pop1920) + value.land.acre1920 + pc.pib.ind1920 + ano_fundacao
                + latitude  + longitude + I(longitude^2) + I(latitude^2) + distancia_mar_km + as.factor(UF), data = dtc) 
cl.se.instr2 <- coeftest(lm.instr2, vcov=function(x) cluster.vcov(x, dtc$UF))[,2]

lm.plan2 <- lm(planejamento ~ log(admpub.municipal1920 +1) + log(munarea1997.km2) + productivity + log(pib.pc.1920) 
               + proprietarios1920 + log(pop1920) + value.land.acre1920 + pc.pib.ind1920 + ano_fundacao 
               + latitude  + longitude + I(longitude^2) + I(latitude^2) + distancia_mar_km + as.factor(UF), data = dtc) 
cl.se.plan2 <- coeftest(lm.plan2, vcov=function(x) cluster.vcov(x, dtc$UF))[,2]

stargazer(lm.iptu2, lm.tottax2, lm.cap2, lm.instr2, lm.plan2,
          se = list(cl.se.iptu2, cl.se.tottax2, cl.se.cap2, cl.se.instr2, cl.se.plan2), 
          keep = "admpub.municipal1920",
          covariate.labels = "Size of municipal bureaucracy (log), 1920",
          dep.var.labels = c("Property taxes per capita (log)", "Tax revenue per capita (log)",
                             "Municipal Capacity Index", "Administrative Instruments", "Planning Instruments"), 
          title = "Past Municipal Bureaucracy Size and Contemporary Local Capacity",
          omit.stat = c("f", "ser", "adj.rsq"), omit = c("Constant"))

rm(dtc, lm.iptu, lm.tottax, lm.cap, lm.instr, lm.plan, cl.se.iptu, cl.se.tottax, cl.se.cap, cl.se.instr, cl.se.plan)
rm(lm.iptu2, lm.tottax2, lm.cap2, lm.instr2, lm.plan2, cl.se.iptu2, cl.se.tottax2, cl.se.cap2, cl.se.instr2, cl.se.plan2)

# Appendix C

# Table C.1: Potential Omitted Variables: Colonial Cycles, Slavery, and Immigration

lm.pst <- lm(log(capita.revenues) ~ land.gini + productivity + log(gdp.pc) 
                   + year.foundation + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
                   + longitude + latitude + I(latitude^2) + I(longitude^2)
                   + dist.coast + dist.capital + as.factor(UF1920),data = data)
cl.pst <- coeftest(lm.pst, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

lm.cycle <- lm(log(capita.revenues) ~ land.gini + ciclo_acucar + ciclo_ouro + productivity + log(gdp.pc) 
                + year.foundation + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
                + longitude + latitude + I(latitude^2) + I(longitude^2) 
                + dist.coast + dist.capital + as.factor(UF1920), data = data)
cl.cycle <- coeftest(lm.cycle, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

lm.slv <- lm(log(capita.revenues) ~ land.gini + pc.enslaved + productivity + log(gdp.pc) 
              + year.foundation + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
              + longitude + latitude + I(latitude^2) + I(longitude^2)
              + dist.coast + dist.capital + as.factor(UF1920), data = data)
cl.slv <- coeftest(lm.slv, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

lm.imm <- lm(log(capita.revenues) ~ land.gini + sh.foreign.farms + productivity + log(gdp.pc)
              + year.foundation + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
              + longitude + latitude + I(latitude^2) + I(longitude^2)
              + dist.coast + dist.capital + as.factor(UF1920), data = data)
cl.imm <- coeftest(lm.imm, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

lm.all <- lm(log(capita.revenues) ~ land.gini 
             + ciclo_acucar + ciclo_ouro + pc.enslaved + sh.foreign.farms + productivity + log(gdp.pc) + year.foundation
             + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind  
             + longitude + latitude + I(latitude^2) + I(longitude^2)
             + dist.coast + dist.capital + as.factor(UF1920), data = data)
cl.all <- coeftest(lm.all, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(lm.pst, lm.cycle, lm.slv, lm.imm, lm.all,
          se=list(cl.pst, cl.cycle, cl.slv, cl.imm, cl.all),
          title = "Potential Omitted Variables: Colonial Cycles, Slavery, and Immigration",
          dep.var.labels = "Tax revenue per capita (log)",
          keep = "land.gini",
          covariate.labels = "Land Gini",
          omit.stat = c("adj.rsq","ll", "f", "ser"),
          add.lines = list(c("Additional covariates", "No", "No", "No", "Yes", "Yes"),
                           c("State fixed effects", "Yes", "Yes", "Yes", "Yes", "Yes"),
                           c("Sugar and gold cycles", "", "Yes", "", "", "Yes"), 
                           c("Enslaved population (%), 1872", "", "", "Yes", "", "Yes"), 
                           c("Immigrant owned farms (%), 1920", "", "", "", "Yes", "Yes")))

rm(lm.pst, lm.cycle, lm.slv, lm.imm, lm.all, cl.pst, cl.cycle, cl.slv, cl.imm, cl.all)

# Table C.2: Reverse Causality: Past State Presence and Landholding Inequality

lm.cap0 <- lm(land.gini ~ scale(officials1872.pop), data=data)
cl.cap0 <- coeftest(lm.cap0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

lm.cap1 <- lm(land.gini ~ scale(officials1872.pop) + as.factor(UF1920), data=data)
cl.cap1 <- coeftest(lm.cap1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

lm.cap2 <- lm(land.gini ~ scale(officials1872.pop) + year.foundation + scale(area.mun) 
                   + latitude + longitude + I(longitude^2) + I(latitude^2)
                   + dist.coast + dist.capital, data=data)
cl.cap2 <- coeftest(lm.cap2, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

lm.cap3 <- lm(land.gini ~ scale(officials1872.pop) + year.foundation + scale(area.mun)
                   + latitude + longitude + I(longitude^2) + I(latitude^2)
                   + dist.coast + dist.capital + as.factor(UF1920), data=data)
cl.cap3 <- coeftest(lm.cap3, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(lm.cap0, lm.cap1, lm.cap2, lm.cap3,
          se=list(cl.cap0, cl.cap1, cl.cap2, cl.cap3),
          keep = "officials1872.pop",
          covariate.labels = "State Officials (% Population), 1872",
          title = "Reverse Causality: Past State Presence and Landholing Inequality",
          dep.var.labels = "Land Gini, 1920",
          omit.stat = c("adj.rsq","ll", "f", "ser"),
          add.lines = list(c("Additional covariates", "No", "No", "Yes", "Yes"),
                           c("Geographic characteristics", "No", "No", "Yes", "Yes"),
                           c("State fixed effects", "No", "Yes", "No", "Yes")))

rm(lm.cap0, lm.cap1, lm.cap2, lm.cap3, cl.cap0, cl.cap1, cl.cap2, cl.cap3)

# Table C.3: Matching Neighboring Localities Within Each State

municipal1920 <- readOGR(dsn = "~/map", layer = "mun1920")

mun.names <- tibble(id = row.names(municipal1920@data),
                    mun_code = as.character(municipal1920@data$id_code),
                    neighbor_code = mun_code)

neighbor.list <- poly2nb(municipal1920) 
neighbor.matrix <- nb2mat(neighbor.list, style="B", zero.policy=TRUE)
colnames(neighbor.matrix) <- rownames(neighbor.matrix)

mun.names.mg1 <- mun.names[,c("id", "mun_code")]
mun.names.mg2 <- mun.names[,c("id", "neighbor_code")]

all.neighbors <- as.data.frame(neighbor.matrix) %>%
  mutate(municipality = row.names(.)) %>%  
  gather(neighbor, present, -municipality) %>%  
  filter(present == 1) %>%  
  left_join(mun.names.mg1, by=c("municipality" = "id")) %>%
  left_join(mun.names.mg2, by=c("neighbor" = "id")) %>%
  dplyr::select(contains("code")) 

all.neighbors$mun.code <- as.numeric(all.neighbors$mun_code)
all.neighbors$neighbor.code <- as.numeric(all.neighbors$neighbor_code)

# Land Gini of the municipality 
nmatch <-  left_join(all.neighbors, dplyr::select(data, land.gini, mun.code), by="mun.code") 
nmatch$land.gini.own <- nmatch$land.gini
nmatch$land.gini <- NULL

# Neighbors' Land Gini
nmatch <- left_join(nmatch, dplyr::select(data, land.gini, mun.code), by=c("neighbor.code" = "mun.code")) 
nmatch$land.gini.n <- nmatch$land.gini
nmatch$land.gini <- NULL

# Neighbors whose land Gini is at least 20% higher or more than 20% lower
nmatch$sample20 <- ifelse((1.2*nmatch$land.gini.own < nmatch$land.gini.n | .8*nmatch$land.gini.own > nmatch$land.gini.n), 1, 0)
sample20 <- unique(nmatch$mun.code[which(nmatch$sample20 == 1)])

# Neighbors whose land Gini is at least 10% higher or more than 10% lower
nmatch$sample10 <- ifelse((1.1*nmatch$land.gini.own < nmatch$land.gini.n | .9*nmatch$land.gini.own > nmatch$land.gini.n), 1, 0)
sample10 <- unique(nmatch$mun.code[which(nmatch$sample10 == 1)])

# Neighbors whose land Gini is at least 30% higher or more than 30% lower
nmatch$sample30 <- ifelse((1.3*nmatch$land.gini.own < nmatch$land.gini.n | .7*nmatch$land.gini.own > nmatch$land.gini.n), 1, 0)
sample30 <- unique(nmatch$mun.code[which(nmatch$sample30 == 1)])

# Samples
data$nmatch.20 <- ifelse(data$mun.code %in% sample20, 1, 0)
data$nmatch.10 <- ifelse(data$mun.code %in% sample10, 1, 0)
data$nmatch.30 <- ifelse(data$mun.code %in% sample30, 1, 0)

# Panel A: Neighboring municipalities where landholding inequality differs by more than 20 percent

nm1 <- lm(log(capita.revenues) ~ land.gini + productivity + log(gdp.pc) + year.foundation
          + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
          + latitude + longitude + I(longitude^2) + I(latitude^2)
          + dist.coast + dist.capital + as.factor(UF1920), data=data, subset = nmatch.20 == 1)
cl.nm1 <- coeftest(nm1, vcov=function(x) cluster.vcov(x, data[which(data$nmatch.20 == 1), "UF1920"]))[,2]

nm2 <- lm(log(revenues.gdp) ~ land.gini + productivity + log(pop.1920) + year.foundation
          + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
          + latitude + longitude + I(longitude^2) + I(latitude^2)
          + dist.coast + dist.capital + as.factor(UF1920), data=data, subset = nmatch.20 == 1)
cl.nm2 <- coeftest(nm2, vcov=function(x) cluster.vcov(x, data[which(data$nmatch.20 == 1), "UF1920"]))[,2]

nm3 <- lm(log(rev.minus.export) ~ land.gini + productivity + log(gdp.pc) + year.foundation
          + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
          + latitude + longitude + I(longitude^2) + I(latitude^2)
          + dist.coast + dist.capital + as.factor(UF1920), data=data, subset = nmatch.20 == 1)
cl.nm3 <- coeftest(nm3, vcov=function(x) cluster.vcov(x, data[which(data$nmatch.20 == 1), "UF1920"]))[,2]

nm4 <- lm(log(admpub.municipal+1) ~ land.gini + productivity + log(gdp.pc) + log(pop.1920) + year.foundation
          + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
          + latitude + longitude + I(longitude^2) + I(latitude^2)
          + dist.coast + dist.capital + as.factor(UF1920), data=data, subset = nmatch.20 == 1)
cl.nm4 <- coeftest(nm4, vcov=function(x) cluster.vcov(x, data[which(data$nmatch.20 == 1), "UF1920"]))[,2]

stargazer(nm1, nm2, nm3, nm4,
          se=list(cl.nm1, cl.nm2, cl.nm3, cl.nm4), 
          keep = c("land.gini"),
          covariate.labels = "Land Gini",
          title = "Panel A: Matching Neighboring Localities Within Each State",
          dep.var.labels = c("Tax revenue per capita (log)", "Tax revenue/Output (log)",
                             "Tax revenue excl. export taxes (log)", "Size of local bureaucracy (log)"),
          column.sep.width = "15pt", 
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = "Constant", 
          add.lines = list(c("Additional covariates", "Yes", "Yes", "Yes", "Yes"),
                           c("State fixed effects", "Yes", "Yes", "Yes", "Yes")))

rm(nm1, nm2, nm3, nm4, cl.nm1, cl.nm2, cl.nm3, cl.nm4)

# # Panel B: Neighboring municipalities where landholding inequality differs by more than 10 percent

nm1.10 <- lm(log(capita.revenues) ~ land.gini + productivity + log(gdp.pc) + year.foundation
             + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
             + latitude + longitude + I(longitude^2) + I(latitude^2)
             + dist.coast + dist.capital + as.factor(UF1920), data=data, subset = nmatch.10 == 1)
cl.nm1.10 <- coeftest(nm1.10, vcov=function(x) cluster.vcov(x, data[which(data$nmatch.10 == 1), "UF1920"]))[,2]

nm2.10 <- lm(log(revenues.gdp) ~ land.gini + productivity + log(pop.1920) + year.foundation
             + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
             + latitude + longitude + I(longitude^2) + I(latitude^2)
             + dist.coast + dist.capital + as.factor(UF1920), data=data, subset = nmatch.10 == 1)
cl.nm2.10 <- coeftest(nm2.10, vcov=function(x) cluster.vcov(x, data[which(data$nmatch.10 == 1), "UF1920"]))[,2]

nm3.10 <- lm(log(rev.minus.export) ~ land.gini + productivity + log(gdp.pc) + year.foundation
             + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
             + latitude + longitude + I(longitude^2) + I(latitude^2)
             + dist.coast + dist.capital + as.factor(UF1920), data=data, subset = nmatch.10 == 1)
cl.nm3.10 <- coeftest(nm3.10, vcov=function(x) cluster.vcov(x, data[which(data$nmatch.10 == 1), "UF1920"]))[,2]

nm4.10 <- lm(log(admpub.municipal+1) ~ land.gini + productivity + log(gdp.pc) + log(pop.1920) + year.foundation
             + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
             + latitude + longitude + I(longitude^2) + I(latitude^2)
             + dist.coast + dist.capital + as.factor(UF1920), data=data, subset = nmatch.10 == 1)
cl.nm4.10 <- coeftest(nm4.10, vcov=function(x) cluster.vcov(x, data[which(data$nmatch.10 == 1), "UF1920"]))[,2]

stargazer(nm1.10, nm2.10, nm3.10, nm4.10,
          se=list(cl.nm1.10, cl.nm2.10, cl.nm3.10, cl.nm4.10), 
          keep = c("land.gini"),
          covariate.labels = "Land Gini",
          title = "Panel B: Matching Neighboring Localities Within Each State",
          dep.var.labels = c("Tax revenue per capita (log)", "Tax revenue/Output (log)",
                             "Tax revenue excl. export taxes (log)", "Size of local bureaucracy (log)"),
          column.sep.width = "15pt", 
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = "Constant", 
          add.lines = list(c("Additional covariates", "Yes", "Yes", "Yes", "Yes"),
                           c("State fixed effects", "Yes", "Yes", "Yes", "Yes")))

rm(nm1.10, nm2.10, nm3.10, nm4.10, cl.nm1.10, cl.nm2.10, cl.nm3.10, cl.nm4.10)

# Panel C: Neighboring municipalities where landholding inequality differs by more than 30 percent

nm1.30 <- lm(log(capita.revenues) ~ land.gini + productivity + log(gdp.pc) + year.foundation
             + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
             + latitude + longitude + I(longitude^2) + I(latitude^2)
             + dist.coast + dist.capital + as.factor(UF1920), data=data, subset = nmatch.30 == 1)
cl.nm1.30 <- coeftest(nm1.30, vcov=function(x) cluster.vcov(x, data[which(data$nmatch.30 == 1), "UF1920"]))[,2]

nm2.30 <- lm(log(revenues.gdp) ~ land.gini + productivity + log(pop.1920) + year.foundation
             + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
             + latitude + longitude + I(longitude^2) + I(latitude^2)
             + dist.coast + dist.capital + as.factor(UF1920), data=data, subset = nmatch.30 == 1)
cl.nm2.30 <- coeftest(nm2.30, vcov=function(x) cluster.vcov(x, data[which(data$nmatch.30 == 1), "UF1920"]))[,2]

nm3.30 <- lm(log(rev.minus.export) ~ land.gini + productivity + log(gdp.pc) + year.foundation
             + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
             + latitude + longitude + I(longitude^2) + I(latitude^2)
             + dist.coast + dist.capital + as.factor(UF1920), data=data, subset = nmatch.30 == 1)
cl.nm3.30 <- coeftest(nm3.30, vcov=function(x) cluster.vcov(x, data[which(data$nmatch.30 == 1), "UF1920"]))[,2]

nm4.30 <- lm(log(admpub.municipal+1) ~ land.gini + productivity + log(gdp.pc) + log(pop.1920) + year.foundation
             + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
             + latitude + longitude + I(longitude^2) + I(latitude^2)
             + dist.coast + dist.capital + as.factor(UF1920), data=data, subset = nmatch.30 == 1)
cl.nm4.30 <- coeftest(nm4.30, vcov=function(x) cluster.vcov(x, data[which(data$nmatch.30 == 1), "UF1920"]))[,2]

stargazer(nm1.30, nm2.30, nm3.30, nm4.30,
          se=list(cl.nm1.30, cl.nm2.30, cl.nm3.30, cl.nm4.30), 
          keep = c("land.gini"),
          covariate.labels = "Land Gini",
          title = "Panel C: Matching Neighboring Localities Within Each State",
          dep.var.labels = c("Tax revenue per capita (log)", "Tax revenue/Output (log)",
                             "Tax revenue excl. export taxes (log)", "Size of local bureaucracy (log)"),
          column.sep.width = "15pt", 
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = "Constant", 
          add.lines = list(c("Additional covariates", "Yes", "Yes", "Yes", "Yes"),
                           c("State fixed effects", "Yes", "Yes", "Yes", "Yes")))

rm(nm1.30, nm2.30, nm3.30, nm4.30, cl.nm1.30, cl.nm2.30, cl.nm3.30, cl.nm4.30)
rm(municipal1920, all.neighbors, mun.names, mun.names.mg1, mun.names.mg2, neighbor.list, neighbor.matrix, nmatch, sample10, sample20, sample30)

# Table C.4: Instrumental Variables Estimates of the Effect of Land Inequality

first.geo <- lm(land.gini ~ terrain.type + temp + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2)
                + as.factor(UF1920), data=data)
cl.first.geo <- coeftest(first.geo, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

iv.geo1 <- ivreg(log(capita.revenues) ~ land.gini 
                 + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2)
                 + as.factor(UF1920) | terrain.type + temp
                 + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2)
                 + as.factor(UF1920), data=data)
cl.iv.geo1 <- coeftest(iv.geo1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

iv.geo2 <- ivreg(log(revenues.gdp) ~ land.gini 
                 + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2)
                 + as.factor(UF1920) | terrain.type + temp
                 + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2)
                 + as.factor(UF1920) , data=data)
cl.iv.geo2 <- coeftest(iv.geo2, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

iv.geo3 <- ivreg(log(rev.minus.export) ~ land.gini 
                 + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2)
                 + as.factor(UF1920) | terrain.type + temp
                 + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2)
                 + as.factor(UF1920) , data=data)
cl.iv.geo3 <- coeftest(iv.geo3, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

iv.geo4 <- ivreg(log(admpub.municipal+1) ~ land.gini 
                 + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2)
                 + as.factor(UF1920) | terrain.type + temp
                 + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2)
                 + as.factor(UF1920) , data=data)
cl.iv.geo4 <- coeftest(iv.geo4, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(first.geo, iv.geo1, iv.geo2, iv.geo3, iv.geo4, 
          se=list(cl.first.geo, cl.iv.geo1, cl.iv.geo2, cl.iv.geo3, cl.iv.geo4), 
          keep = c("land.gini", "terrain.type", "temp"),
          covariate.labels = c("Type of terrain", "Temperature (annual average)", "Land Gini"),
          title = "Instrumental Variables Estimates of the Effect of Land Inequality",
          dep.var.labels = c("Land Gini", "Tax revenue per capita" , "Tax Revenue/Output", "Revenue w/o export taxes", "Number of public officials (log)"),
          omit.stat = c("rsq", "adj.rsq","ll", "F", "ser"), omit = "Constant", 
          add.lines = list(c("Geographic controls", "Yes", "Yes", "Yes", "Yes", "Yes"),
                           c("State fixed effects", "Yes", "Yes", "Yes", "Yes", "Yes"),
                           c("Model", "2SLS", "2SLS", "2SLS", "2SLS", "2SLS"),
                           c("", "First stage", "Second stage", "Second stage", "Second stage", "Second stage"),
                           c("F-statistic", "23.816*** (df = 26; 1219)", "", "", "", "")))

rm(first.geo, iv.geo1, iv.geo2, iv.geo3, iv.geo4, cl.first.geo, cl.iv.geo1, cl.iv.geo2, cl.iv.geo3, cl.iv.geo4)

# Table C.5: Exclusion Restriction: Geography and Crop Choice 

tub.ins0 <- lm(scale(cer.tub) ~ terrain.type + temp, data = data)
cl.tub.ins0 <- coeftest(tub.ins0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

tub.ins1 <- lm(scale(cer.tub) ~ terrain.type + temp + scale(area.mun) + latitude + longitude + I(latitude^2) + I(longitude^2) + as.factor(UF1920), data = data)
cl.tub.ins1 <- coeftest(tub.ins1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

tub0 <- lm(scale(cer.tub) ~ land.gini, data = data)
cl.tub0 <- coeftest(tub0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

tub1 <- lm(scale(cer.tub) ~ land.gini + log(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2) + as.factor(UF1920), data = data)
cl.tub1 <- coeftest(tub1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(tub.ins0, tub.ins1, tub0, tub1,
          se=list(cl.tub.ins0, cl.tub.ins1, cl.tub0, cl.tub1), 
          keep = c("terrain.type", "temp", "land.gini"),
          covariate.labels = c("Type of terrain", "Temperature", "Land Gini"),
          title = "Exclusion Restriction: Geography and Crop Choice",
          dep.var.labels = "Cereals minus Tubers Production (tons)",
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = "Constant", 
          add.lines = list(c("State Fixed Effects", "No", "Yes", "No", "Yes"),
                           c("Geographic covariates", "No", "Yes", "No", "Yes")))

rm(tub.ins0, tub.ins1, tub0, tub1, cl.tub.ins0, cl.tub.ins1, cl.tub0, cl.tub1)

# Table C.6: Exclusion Restriction: Geography, Coffee, Sugar, and Cattle Production

coff.ins0 <- lm(log(prod.coffee+1) ~ terrain.type + temp, data = data)
cl.coff.ins0 <- coeftest(coff.ins0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

coff.ins1 <- lm(log(prod.coffee+1) ~ terrain.type + temp + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2) + as.factor(UF1920), data = data)
cl.coff.ins1 <- coeftest(coff.ins1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

bov.ins0 <- lm(log(prod.cattle+1) ~ terrain.type + temp, data = data)
cl.bov.ins0 <-coeftest(bov.ins0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

bov.ins1 <- lm(log(prod.cattle+1) ~ terrain.type + temp + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2) + as.factor(UF1920), data = data)
cl.bov.ins1 <-coeftest(bov.ins1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

sugar.ins0 <- lm(log(prod.sugar+1) ~ terrain.type + temp, data = data)
cl.sugar.ins0 <- coeftest(sugar.ins0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

sugar.ins1 <- lm(log(prod.sugar+1) ~ terrain.type + temp + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2) + as.factor(UF1920), data = data)
cl.sugar.ins1 <-coeftest(sugar.ins1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(coff.ins0, coff.ins1, sugar.ins0, sugar.ins1, bov.ins0, bov.ins1,
          se = list(cl.coff.ins0, cl.coff.ins1, cl.sugar.ins0, cl.sugar.ins1, cl.bov.ins0, cl.bov.ins1), 
          keep = c("terrain.type", "temp"),
          covariate.labels = c("Type of terrain", "Temperature"),
          dep.var.labels = c("Coffee", "Sugar", "Cattle"),
          title = "Exclusion Restriction: Geography, Coffee, Sugar, and Cattle Production",
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = "Constant", 
          add.lines = list(c("State Fixed Effects", "No", "Yes", "No", "Yes", "No", "Yes"),
                           c("Geographic covariates", "No", "Yes", "No", "Yes", "No", "Yes")))

rm(coff.ins0, coff.ins1, sugar.ins0, sugar.ins1, bov.ins0, bov.ins1, cl.coff.ins0, cl.coff.ins1, cl.sugar.ins0, cl.sugar.ins1, cl.bov.ins0, cl.bov.ins1)

# Table C.7: Land Inequality, Coffee, Sugar, and Cattle Production

coff0 <- lm(log(prod.coffee+1) ~ land.gini, data = data)
cl.coff0 <- coeftest(coff0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

coff1 <- lm(log(prod.coffee+1) ~ land.gini + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2) + as.factor(UF1920), data = data)
cl.coff1 <- coeftest(coff1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

bov0 <- lm(log(prod.cattle+1) ~ land.gini, data = data)
cl.bov0 <- coeftest(bov0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

bov1 <- lm(log(prod.cattle+1) ~ land.gini + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2) + as.factor(UF1920), data = data)
cl.bov1 <- coeftest(bov1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

sugar0 <- lm(log(prod.sugar+1) ~ land.gini, data = data)
cl.sugar0 <- coeftest(sugar0, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

sugar1 <- lm(log(prod.sugar+1) ~ land.gini + scale(area.mun) + latitude + I(latitude^2) + longitude + I(longitude^2) + as.factor(UF1920), data = data)
cl.sugar1 <- coeftest(sugar1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(coff0, coff1, sugar0, sugar1, bov0, bov1,
          se=list(cl.coff0, cl.coff1, cl.sugar0, cl.sugar1, cl.bov0, cl.bov1), 
          keep = "land.gini",
          covariate.labels = "Land Gini",
          dep.var.labels = c("Coffee", "Sugar", "Cattle"),
          title = "Land Inequality, Coffee, Sugar, and Cattle Production",
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = "Constant", 
          add.lines = list(c("State Fixed Effects", "No", "Yes", "No", "Yes", "No", "Yes"),
                           c("Geographic covariates", "No", "Yes", "No", "Yes", "No", "Yes")))

rm(coff0, coff1, sugar0, sugar1, bov0, bov1, cl.coff0, cl.coff1, cl.sugar0, cl.sugar1, cl.bov0, cl.bov1)

# Appendix D

# Table D.1: Local Fiscal Capacity and State Transfers to Municipalities, São Paulo (1918)

fit.sp1 <- lm(log(capita.revenues) ~ land.gini + productivity + log(gdp.pc) + year.foundation
             + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
             + latitude + longitude + I(latitude^2) + I(longitude^2)
             + dist.capital + dist.coast, data = data[which(data$UF1920 == 35),])

fit.sp2 <- lm(log(capita.revenues) ~ log(gov.transfers/pop.1920+1) + productivity + log(gdp.pc) + year.foundation
             + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
             + latitude + longitude + I(latitude^2) + I(longitude^2)
             + dist.capital + dist.coast, data = data[which(data$UF1920 == 35),])

fit.sp3 <- lm(log(capita.revenues) ~ land.gini + log(gov.transfers/pop.1920+1) + productivity + log(gdp.pc) + year.foundation
             + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
             + latitude + longitude + I(latitude^2) + I(longitude^2)
             + dist.capital + dist.coast, data = data[which(data$UF1920 == 35),])

stargazer(fit.sp1, fit.sp2, fit.sp3,
          keep = c("land.gini", "gov.transfers"),
          covariate.labels = c("Land Gini", "State transfers, per capita (log)"),
          title = "Local Fiscal Capacity and State Transfers to Municipalities",
          dep.var.labels = "Tax Revenue per capita (log)",
          omit.stat = c("ll", "f", "ser", "adj.rsq"), omit = "Constant")

rm(fit.sp1, fit.sp2, fit.sp3)

# Table D.2: Local Fiscal Capacity and Net State Spending by Municipality, Minas Gerais (1921)

fit.mg1 <- lm(log(capita.revenues) ~ land.gini + productivity + log(gdp.pc) + year.foundation 
             + num.landowners + avg.value.hectar + pc.PIB.ind + log(area.mun)
             + latitude + longitude + I(longitude^2) + I(latitude^2)
             + dist.coast + dist.capital, data = data[which(data$UF1920 == 31),])

fit.mg2 <- lm(log(capita.revenues) ~ scale(net.state.spd.pc) + productivity + log(gdp.pc) + year.foundation 
             + num.landowners + avg.value.hectar + pc.PIB.ind + log(area.mun)
             + latitude + longitude + I(longitude^2) + I(latitude^2)
             + dist.coast + dist.capital, data = data[which(data$UF1920 == 31),])

fit.mg3 <- lm(log(capita.revenues) ~ land.gini + scale(net.state.spd.pc) + productivity + log(gdp.pc) + year.foundation 
             + num.landowners + avg.value.hectar + pc.PIB.ind + log(area.mun)
             + latitude + longitude + I(longitude^2) + I(latitude^2)
             + dist.coast + dist.capital, data = data[which(data$UF1920 == 31),])

stargazer(fit.mg1, fit.mg2, fit.mg3,
          keep = c("land.gini", "net.state.spd.pc"), 
          covariate.labels = c("Land Gini", "Net State Spending, per capita"),
          title = "Alternative Explanation: The Role of State-Level Spending",
          dep.var.labels = "Tax Revenue per capita (log)",
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = "Constant", 
          add.lines = list(c("Additional covariates", "Yes", "Yes", "Yes"),
                           c("Geographic characteristics", "Yes", "Yes", "Yes")))

rm(fit.mg1, fit.mg2, fit.mg3)

# Table D.3: Exploring Mechanisms: Crop Mix, Landowners' Social Power, Literacy, and Railroads

# Table D.3 Panel A: Crop Mix

fit.crop1 <- lm(log(capita.revenues) ~ land.gini + productivity + log(gdp.pc) + year.foundation
                + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
                + scale(prod.mandioca) + scale(prod.tobacco) + scale(prod.wheat) + scale(prod.cattle) + scale(prod.cacau) + scale(prod.cotton) + scale(prod.sugar) + scale(prod.coffee)
                + latitude + longitude + I(latitude^2) + I(longitude^2) + dist.coast + dist.capital + as.factor(UF1920), data = data)
cl.crop1 <- coeftest(fit.crop1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

fit.crop2 <- lm(log(revenues.gdp) ~ land.gini + productivity + log(pop.1920) + year.foundation
                + num.landowners + avg.value.hectar + log(area.mun)+ pc.PIB.ind 
                + scale(prod.mandioca) + scale(prod.tobacco) + scale(prod.wheat) + scale(prod.cattle) + scale(prod.cacau) + scale(prod.cotton) + scale(prod.sugar) + scale(prod.coffee)
                + latitude + longitude + I(latitude^2) + I(longitude^2) + dist.coast + dist.capital + as.factor(UF1920), data = data)
cl.crop2 <- coeftest(fit.crop2, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

fit.crop3 <- lm(log(rev.minus.export) ~ land.gini + productivity + log(gdp.pc) + year.foundation
                + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
                + scale(prod.mandioca) + scale(prod.tobacco) + scale(prod.wheat) + scale(prod.cattle) + scale(prod.cacau) + scale(prod.cotton) + scale(prod.sugar) + scale(prod.coffee)
                + latitude + longitude + I(latitude^2) + I(longitude^2) + dist.coast + dist.capital + as.factor(UF1920), data = data)
cl.crop3 <- coeftest(fit.crop3, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

fit.crop4 <- lm(log(admpub.municipal+1) ~ land.gini + productivity + log(gdp.pc) + log(pop.1920) + year.foundation
                + num.landowners + avg.value.hectar + log(area.mun) + pc.PIB.ind 
                + scale(prod.mandioca) + scale(prod.tobacco) + scale(prod.wheat) + scale(prod.cattle) + scale(prod.cacau) + scale(prod.cotton) + scale(prod.sugar) + scale(prod.coffee)
                + latitude + longitude + I(latitude^2) + I(longitude^2) + dist.coast + dist.capital + as.factor(UF1920), data = data)
cl.crop4 <- coeftest(fit.crop4, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(fit.crop1, fit.crop2, fit.crop3, fit.crop4,
          se = list(cl.crop1, cl.crop2, cl.crop3, cl.crop4),
          keep = c("land.gini"),
          covariate.labels = "Land Gini",
          title = "Panel A: Crop Mix",
          dep.var.labels = c("Tax revenue per capita (log)", "Tax revenue/Output (log)", "Tax revenue excl. export taxes (log)", "Number of public officials (log)"),
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = c("Constant"),
          add.lines = list(c("Additional covariates","Yes", "Yes", "Yes", "Yes"),
                           c("State fixed effects","Yes", "Yes", "Yes", "Yes")) )

rm(fit.crop1, fit.crop2, fit.crop3, fit.crop4, cl.crop1, cl.crop2, cl.crop3, cl.crop4)

# Table D.3 Panel B: Social Control

fit.shrc1 <- lm(log(capita.revenues) ~ land.gini + shrcrp.elect + productivity + log(gdp.pc) + year.foundation
                      + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
                      + latitude + longitude + I(latitude^2) + I(longitude^2)
                      + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.shrc1 <- coeftest(fit.shrc1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

fit.shrc2 <- lm(log(revenues.gdp) ~ land.gini + shrcrp.elect + productivity + log(pop.1920) + year.foundation
                      + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
                      + latitude + longitude + I(latitude^2) + I(longitude^2)
                      + dist.capital + dist.coast + as.factor(UF1920), data=data)
cl.shrc2 <- coeftest(fit.shrc2, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

fit.shrc3 <- lm(log(rev.minus.export) ~ land.gini + shrcrp.elect + productivity + log(gdp.pc) + year.foundation
                      + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
                      + latitude + longitude + I(latitude^2) + I(longitude^2)
                      + dist.capital + dist.coast + as.factor(UF1920), data=data)
cl.shrc3 <- coeftest(fit.shrc3, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

fit.shrc4 <- lm(log(admpub.municipal+1) ~ land.gini + shrcrp.elect + productivity + log(gdp.pc) + log(pop.1920) + year.foundation
                      + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
                      + latitude + longitude + I(latitude^2) + I(longitude^2)
                      + dist.capital + dist.coast + as.factor(UF1920), data=data)
cl.shrc4 <- coeftest(fit.shrc4, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(fit.shrc1, fit.shrc2, fit.shrc3, fit.shrc4,
          se = list(cl.shrc1, cl.shrc2, cl.shrc3, cl.shrc4), 
          keep = c("land.gini", "shrcrp.elect"),
          covariate.labels = c("Land Gini", "Sharecroppers (% electorate)"),
          title = "Sharecroppers as a share of the electorate",
          dep.var.labels = c("Tax revenue per capita (log)", "Tax revenue/Output (log)", "Tax revenue excl. export taxes (log)", "Number of public officials (log)"),
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = c("Constant"),
          add.lines = list(c("Additional covariates","Yes", "Yes", "Yes", "Yes"),
                           c("State fixed effects","Yes", "Yes", "Yes", "Yes")) )

rm(fit.shrc1, fit.shrc2, fit.shrc3, fit.shrc4, cl.shrc1, cl.shrc2, cl.shrc3, cl.shrc4)

# Table D.3 Panel C: Railroads

fit.rr1 <- lm(log(capita.revenues) ~ land.gini + num.railroads + productivity + log(gdp.pc) + year.foundation
              + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
              + latitude + longitude + I(longitude^2) + I(latitude^2)
              + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.rr1 <- coeftest(fit.rr1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

fit.rr2 <- lm(log(revenues.gdp) ~ land.gini + num.railroads + productivity + log(pop.1920) + year.foundation
              + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
              + latitude + longitude + I(longitude^2) + I(latitude^2)
              + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.rr2 <- coeftest(fit.rr2, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

fit.rr3 <- lm(log(rev.minus.export) ~ land.gini + num.railroads + productivity + log(gdp.pc) + year.foundation
              + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
              + latitude + longitude + I(longitude^2) + I(latitude^2)
              + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.rr3 <- coeftest(fit.rr3, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

fit.rr4 <- lm(log(admpub.municipal+1) ~ land.gini + num.railroads + productivity + log(gdp.pc) + log(pop.1920) + year.foundation
              + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
              + latitude + longitude + I(longitude^2) + I(latitude^2)
              + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.rr4 <- coeftest(fit.rr4, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(fit.rr1, fit.rr2, fit.rr3, fit.rr4,
          se = list(cl.rr1, cl.rr2, cl.rr3, cl.rr4), 
          keep = c("land.gini", "num.railroads"),
          covariate.labels = c("Land Gini", "Number of railroads"),
          title = "Landholding Inequality versus Railroads",
          dep.var.labels = c("Tax revenue per capita (log)", "Tax revenue/Output (log)", "Tax revenue excl. export taxes (log)", "Number of public officials (log)"),
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = c("Constant"),
          add.lines = list(c("Additional covariates","Yes", "Yes", "Yes", "Yes"),
                           c("State fixed effects","Yes", "Yes", "Yes", "Yes")) )

rm(fit.rr1, fit.rr2, fit.rr3, fit.rr4, cl.rr1, cl.rr2, cl.rr3, cl.rr4)

# Table D.3 Panel D: Literacy 

fit.lit1 <- lm(log(capita.revenues) ~ land.gini + pc.literate + productivity + log(gdp.pc) + year.foundation
               + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
               + latitude + longitude + I(longitude^2) + I(latitude^2)
               + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.lit1 <- coeftest(fit.lit1, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

fit.lit2 <- lm(log(revenues.gdp) ~ land.gini + pc.literate + productivity + log(pop.1920) + year.foundation
               + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
               + latitude + longitude + I(longitude^2) + I(latitude^2)
               + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.lit2 <- coeftest(fit.lit2, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

fit.lit3 <- lm(log(rev.minus.export) ~ land.gini + pc.literate + productivity + log(gdp.pc) + year.foundation
               + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
               + latitude + longitude + I(longitude^2) + I(latitude^2)
               + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.lit3 <- coeftest(fit.lit3, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

fit.lit4 <- lm(log(admpub.municipal+1) ~ land.gini + pc.literate + productivity + log(gdp.pc) + log(pop.1920) + year.foundation
               + num.landowners + avg.value.hectar + scale(area.mun) + pc.PIB.ind 
               + latitude + longitude + I(longitude^2) + I(latitude^2)
               + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.lit4 <- coeftest(fit.lit4, vcov=function(x) cluster.vcov(x, data$UF1920))[,2]

stargazer(fit.lit1, fit.lit2, fit.lit3, fit.lit4,
          se = list(cl.lit1, cl.lit2, cl.lit3, cl.lit4), 
          keep = c("land.gini", "pc.literate"),
          covariate.labels = c("Land Gini", "Literacy rate (%)"),
          title = "Exploring Mechansism: Literacy",
          dep.var.labels = c("Tax revenue per capita (log)", "Tax revenue/Output (log)", "Tax revenue excl. export taxes (log)", "Number of public officials (log)"),
          omit.stat = c("adj.rsq","ll", "f", "ser"), omit = c("Constant"),
          add.lines = list(c("Additional covariates","Yes", "Yes", "Yes", "Yes"),
                           c("State fixed effects","Yes", "Yes", "Yes", "Yes")) )

rm(fit.lit1, fit.lit2, fit.lit3, fit.lit4, cl.lit1, cl.lit2, cl.lit3, cl.lit4)

# Appendix E

# Table E.1: Alternative Explanations: Crop Specialization, Export Orientation, Strength of the
# State-Level Apparatus, Slavery, Immigration, Industrialization, and Development

fit.UF.inst <- lm(log(capita.revenues) ~ land.gini*opposition.sh 
                  + land.gini*incumb.dur
                  + productivity + log(gdp.pc) + year.foundation + avg.value.hectar + log(area.mun) + pc.PIB.ind 
                  + latitude + longitude + I(latitude^2) + I(longitude^2)
                  + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.UF.inst <- coeftest(fit.UF.inst, vcov=function(x) cluster.vcov(x, data$UF1920))

fit.UF.crops <- lm(log(capita.revenues) ~ land.gini*opposition.sh
                  + land.gini*leather.exp.sh + land.gini*coffee.exp.sh + land.gini*cotton.exp.sh 
                  + land.gini*sugar.exp.sh + land.gini*rubber.exp.sh + land.gini*tobacco.exp.sh
                  + productivity + log(gdp.pc) + year.foundation + avg.value.hectar + log(area.mun) + pc.PIB.ind 
                  + latitude + longitude + I(latitude^2) + I(longitude^2)
                  + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.crops <- coeftest(fit.UF.crops, vcov=function(x) cluster.vcov(x, data$UF1920))

fit.UF.export <- lm(log(capita.revenues) ~ land.gini*opposition.sh 
                    + land.gini*exp.tax.rev.UF
                    + productivity + log(gdp.pc) + year.foundation + avg.value.hectar + log(area.mun) + pc.PIB.ind 
                    + latitude + longitude + I(latitude^2) + I(longitude^2)
                    + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.exp.UF <- coeftest(fit.UF.export, vcov=function(x) cluster.vcov(x, data$UF1920))

fit.UF.export.value <- lm(log(capita.revenues) ~ land.gini*opposition.sh
                          + land.gini*log(tot.exp.tx.capita.UF+1)
                          + productivity + log(gdp.pc) + year.foundation + avg.value.hectar + log(area.mun) + pc.PIB.ind 
                          + latitude + longitude + I(latitude^2) + I(longitude^2)
                          + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.value.exp <- coeftest(fit.UF.export.value, vcov=function(x) cluster.vcov(x, data$UF1920))

fit.UF.rev.capita <- lm(log(capita.revenues) ~ land.gini*opposition.sh
                        + land.gini*log(tot.rev.capita.UF+1)
                        + productivity + log(gdp.pc) + year.foundation + avg.value.hectar + log(area.mun) + pc.PIB.ind 
                        + latitude + longitude + I(latitude^2) + I(longitude^2)
                        + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.rev.cap <- coeftest(fit.UF.rev.capita, vcov=function(x) cluster.vcov(x, data$UF1920))

stargazer(fit.UF.inst, fit.UF.crops, fit.UF.export, fit.UF.export.value, fit.UF.rev.capita, 
          se = list(cl.UF.inst[,2], cl.crops[,2], cl.exp.UF[,2], cl.value.exp[,2], cl.rev.cap[,2]),
          p = list(cl.UF.inst[,4], cl.crops[,4], cl.exp.UF[,4], cl.value.exp[,4], cl.rev.cap[,4]),
          omit = c("Constant", "incumb.dur", "leather.exp.sh", "coffee.exp.sh", "cotton.exp.sh", 
                   "sugar.exp.sh", "rubber.exp.sh", "tobacco.exp.sh", "exp.tax.rev.UF", "tot.exp.tx.capita.UF", "tot.rev.capita.UF",
                   "productivity", "UF1920", "latitude", "longitude", "dist.capital", "dist.coast", "gdp.pc", "year.foundation", "avg.value.hectar", "area.mun", "pc.PIB.ind"),
          covariate.labels = c("Land Gini", "Political Threat", "Land Gini x Political Threat"),
          dep.var.labels = "Tax revenue per capita (log)",
          title = "Alternative Explanations: Panel A",
          omit.stat = c("f", "ser", "adj.rsq"))

rm(fit.UF.inst, fit.UF.crops, fit.UF.export, fit.UF.export.value, fit.UF.rev.capita, cl.UF.inst, cl.crops, cl.exp.UF, cl.value.exp, cl.rev.cap)

# Table E.1 Panel B

fit.UF.slv <- lm(log(capita.revenues) ~ land.gini*opposition.sh
                 + land.gini*pc.enslaved.UF
                 + productivity + log(gdp.pc) + year.foundation + avg.value.hectar + log(area.mun) + pc.PIB.ind 
                 + latitude + longitude + I(latitude^2) + I(longitude^2)
                 + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.slv  <- coeftest(fit.UF.slv, vcov=function(x) cluster.vcov(x, data$UF1920))

fit.UF.imm <- lm(log(capita.revenues) ~ land.gini*opposition.sh
                 + land.gini*sh.foreign.farms.UF
                 + productivity + log(gdp.pc) + year.foundation + avg.value.hectar + log(area.mun) + pc.PIB.ind 
                 + latitude + longitude + I(latitude^2) + I(longitude^2)
                 + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.imm <- coeftest(fit.UF.imm, vcov=function(x) cluster.vcov(x, data$UF1920))

fit.UF.ind <- lm(log(capita.revenues) ~ land.gini*opposition.sh
                 + land.gini*pc.pib.ind.UF 
                 + productivity + log(gdp.pc) + year.foundation + avg.value.hectar + log(area.mun) + pc.PIB.ind 
                 + latitude + longitude + I(latitude^2) + I(longitude^2)
                 + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.ind <- coeftest(fit.UF.ind, vcov=function(x) cluster.vcov(x, data$UF1920))

fit.UF.pib <- lm(log(capita.revenues) ~ land.gini*opposition.sh
                 + land.gini*log(pib.capita.UF)
                 + productivity + log(gdp.pc) + year.foundation + avg.value.hectar + log(area.mun) + pc.PIB.ind 
                 + latitude + longitude + I(latitude^2) + I(longitude^2)
                 + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.pib <- coeftest(fit.UF.pib, vcov=function(x) cluster.vcov(x, data$UF1920))

fit.UF.admpub <- lm(log(capita.revenues) ~ land.gini*opposition.sh
                    + land.gini*admpub.estadual.capita.UF 
                    + productivity + log(gdp.pc) + year.foundation + avg.value.hectar + log(area.mun) + pc.PIB.ind 
                    + latitude + longitude + I(latitude^2) + I(longitude^2)
                    + dist.capital + dist.coast + as.factor(UF1920), data = data)
cl.adm.UF <- coeftest(fit.UF.admpub, vcov=function(x) cluster.vcov(x, data$UF1920))

stargazer(fit.UF.slv, fit.UF.imm, fit.UF.ind, fit.UF.pib, fit.UF.admpub,
          se = list(cl.slv[,2], cl.imm[,2], cl.ind[,2], cl.pib[,2], cl.adm.UF[,2]),
          p = list(cl.slv[,4], cl.imm[,4], cl.ind[,4], cl.pib[,4], cl.adm.UF[,4]),
          omit = c("Constant", "pc.enslaved.UF", "sh.foreign.farms.UF", "pc.pib.ind.UF", "pc.pib.ind.UF", "pib.capita.UF", "admpub.estadual.capita.UF",
                   "productivity", "UF1920", "latitude", "longitude", "dist.capital", "dist.coast", "gdp.pc", "year.foundation", "avg.value.hectar", "area.mun", "pc.PIB.ind"),
          covariate.labels = c("Land Gini", "Political Threat", "Land Gini x Political Threat"),
          dep.var.labels = "Tax revenue per capita (log)",
          title = "Alternative Explanations: Panel B",
          omit.stat = c("f", "ser", "adj.rsq"))

rm(fit.UF.slv, fit.UF.imm, fit.UF.ind, fit.UF.pib, fit.UF.admpub, cl.slv, cl.imm, cl.ind, cl.pib, cl.adm.UF)

# Appendix F

# Table F.1: The Moderating Role of Political Uncertainty

data_us <- read.csv("us-data.csv")

# Interaction with Southern Indicator
fit.us.s <- lm(ltaxc_pcy ~  giniy*south + vfmagr_pfy + log(county_area) + ubp_sharey  + ngp_sharey + pslave1860 
               + longitude + latitude + I(longitude^2) + I(latitude^2) + as.factor(state_fips), data = data_us) 
cl.us.s <- coeftest(fit.us.s, vcov=function(x) cluster.vcov(x, data_us$state_fips)) 

# Interaction with Democrat Seat Share
fit.us.dem <- lm(ltaxc_pcy ~  giniy*dem_seat_share_house + south + vfmagr_pfy + log(county_area) + ubp_sharey + ngp_sharey + pslave1860 
                 + longitude + latitude + I(longitude^2) + I(latitude^2) + as.factor(state_fips), data = data_us) 
cl.us.dem <- coeftest(fit.us.dem, vcov=function(x) cluster.vcov(x, data_us$state_fips))

# Interaction with Disenfranchisement Indicator
fit.us.dis <- lm(ltaxc_pcy ~  giniy*disenfranchise + south + vfmagr_pfy  + log(county_area) + ubp_sharey + ngp_sharey + pslave1860 
                 + longitude + latitude + I(longitude^2) + I(latitude^2) + as.factor(state_fips), data = data_us)
cl.us.dis <- coeftest(fit.us.dis, vcov=function(x) cluster.vcov(x, data_us$state_fips)) 

stargazer(fit.us.s, fit.us.dem, fit.us.dis,
          keep = c("giniy", "south", "dem_seat_share_house", "disenfranchise"),
          covariate.labels = c("Land Gini", "Democratic Seat Share", "Voting Restrictions", "South", "Land Gini x South", "Land Gini x Democratic Seat Share", "Land Gini x Voting Restrictions"),
          se = list(cl.us.s[,2], cl.us.dem[,2], cl.us.dis[,2]), 
          p = list(cl.us.s[,4], cl.us.dem[,4], cl.us.dis[,4]),
          dep.var.labels = "Local Tax Revenue per capita (log)",
          title = "The Moderating Role of Political Uncertainty: Landholding Inequality and Local Fiscal Capacity in the United States",
          omit.stat = c("f", "ser", "adj.rsq"), omit = c("Constant"))

rm(fit.us.s, fit.us.dem, fit.us.dis, cl.us.s, cl.us.dem, cl.us.dis)

# Figure F.1: Marginal Effects: Estimated Land Gini Coefficient by Level of Political Uncertainty

# Marginal effects: (a) South
est_south <- felm(ltaxc_pcy ~  giniy*south + vfmagr_pfy  + log(county_area) + ubp_sharey + ngp_sharey + pslave1860 
                  + longitude + latitude + I(longitude^2) + I(latitude^2) | state_fips | 0 | state_fips, data = data_us)

beta.hat <- coef(est_south) 
cov <- vcov(est_south) 
z0 <- c(0, 1)
dy.dx <- beta.hat["giniy"] + beta.hat["giniy:south"]*z0
se.dy.dx <- sqrt(cov["giniy", "giniy"] + z0^2*cov["giniy:south", "giniy:south"] + 2*z0*cov["giniy", "giniy:south"])
upr <- dy.dx + 1.96*se.dy.dx
lwr <- dy.dx - 1.96*se.dy.dx

ggplot(data=NULL,aes(x=z0, y=dy.dx)) + 
  labs(x="South", y="Marginal Effects of Land Gini",
       title="") +
  geom_pointrange(aes(ymin=lwr, ymax=upr)) +
  geom_hline(yintercept=0, linetype = "dashed") +
  theme_classic() + scale_x_discrete(limits = c(0, 1)) +
  theme(axis.title.x = element_text(size=20),
        axis.title.y = element_text(size=20),
        axis.text.x = element_text(size = 15))


# Marginal effects: (b) Democratic Seat Share
est_dem <- felm(ltaxc_pcy ~  giniy*dem_seat_share_house + south + vfmagr_pfy + log(county_area) + ubp_sharey + ngp_sharey + pslave1860
                + longitude + latitude + I(longitude^2) + I(latitude^2) | state_fips | 0 | state_fips, data = data_us)

beta.hat <- coef(est_dem)
vcov1 <- vcov(est_dem) 
z0 <- seq(min(data_us$dem_seat_share_house,  na.rm =T), max(data_us$dem_seat_share_house, na.rm =T), length.out = 1000)
dy.dx <- beta.hat["giniy"] + beta.hat["giniy:dem_seat_share_house"]*z0
se.dy.dx <- sqrt(vcov1["giniy", "giniy"] + (z0^2)*vcov1["giniy:dem_seat_share_house", "giniy:dem_seat_share_house"] + 2*z0*vcov1["giniy", "giniy:dem_seat_share_house"])
upr <- dy.dx + 1.96*se.dy.dx
lwr <- dy.dx - 1.96*se.dy.dx

ggplot(data=NULL, aes(x=z0, y=dy.dx)) + 
  labs(x="Democratic Party Seats in the Legislature (%)", y="Marginal Effects of Land Gini", 
       title= "", 
       cex = 4) +
  geom_line(aes(z0, dy.dx), color = "firebrick3") +
  geom_hline(yintercept=0, linetype = "dashed", color = "dodgerblue3") +
  geom_ribbon(aes(ymin=lwr, ymax=upr), alpha=0.3, fill = "grey60") + 
  theme(legend.position = "none",
        axis.title = element_text(size=15),
        axis.text.x = element_text(size=13), 
        axis.text.y = element_text(size=13), 
        plot.background = element_rect(fill = "white"), 
        panel.background = element_rect(fill = "white", colour = "white", size = 0.4, linetype = "solid"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(size=.3, color="azure2"), 
        panel.grid.minor = element_blank() )

# Marginal effects: (c) Voting Restrictions
est_dis <- felm(ltaxc_pcy ~  giniy*disenfranchise + south + vfmagr_pfy  + log(county_area) + ubp_sharey + ngp_sharey + pslave1860 
                + longitude + latitude + I(longitude^2) + I(latitude^2) | state_fips | 0 | state_fips, data = data_us)

beta.hat <- coef(est_dis) 
cov <- vcov(est_dis) 
z0 <- c(0, 1)
dy.dx <- beta.hat["giniy"] + beta.hat["giniy:disenfranchise"]*z0
se.dy.dx <- sqrt(cov["giniy", "giniy"] + z0^2*cov["giniy:disenfranchise", "giniy:disenfranchise"] + 2*z0*cov["giniy", "giniy:disenfranchise"])
upr <- dy.dx + 1.96*se.dy.dx
lwr <- dy.dx - 1.96*se.dy.dx

ggplot(data=NULL,aes(x=z0, y=dy.dx)) + 
  labs(x="Voting Restrictions", y="Marginal Effects of Land Gini",
       title="") +
  geom_pointrange(aes(ymin=lwr, ymax=upr)) +
  geom_hline(yintercept=0, linetype = "dashed") +
  theme_classic() + scale_x_discrete(limits=c(0, 1)) +
  theme(axis.title.x = element_text(size=20),
        axis.title.y = element_text(size=20),
        axis.text.x = element_text(size = 15) )

rm(cov, data_us, est_dem, est_dis, est_south, beta.hat, dy.dx, lwr, upr, z0, se.dy.dx)
